3.7 合并数据集:Concat Append

本文源码请见我的GitHub

1
2
import numpy as np
import pandas as pd
1
2
3
4
5
def make_df(cols, ind):
'''一个简单的DataFrame创建函数'''
data = {c: [str(c) + str(i) for i in ind] for c in cols}
return pd.DataFrame(data,ind)
make_df('ABC', range(4))
A B C
0 A0 B0 C0
1 A1 B1 C1
2 A2 B2 C2
3 A3 B3 C3

pd.concat()可以简单的合并一维的Series或dataFrame吗与 np.concat()合并数组一样

1
2
ser1 = pd.Series(['A', 'B', 'C'], index = [1,2 ,3])
ser2 = pd.Series(['D', 'E', 'F'], index = [4,5,6])
1
2
3
4
5
df1 = make_df('AB',[1,2])
df2 = make_df('AB',[3,4])
print(df1);print('\n');
print(df2);print('\n')
print(pd.concat([df1,df2]))
    A   B
1  A1  B1
2  A2  B2


    A   B
3  A3  B3
4  A4  B4


    A   B
1  A1  B1
2  A2  B2
3  A3  B3
4  A4  B4
1
2
df3 = make_df('AB', [0,1])
df4 = make_df('CD', [0,1])
1
df3
A B
0 A0 B0
1 A1 B1
1
df4
C D
0 C0 D0
1 C1 D1
1
pd.concat([df3,df4] ,axis= 1)
A B C D
0 A0 B0 C0 D0
1 A1 B1 C1 D1

1.索引重复

1
#ignore_index用来忽略重复的索引
1
2
3
x  = make_df('AB', [0,1])
y = make_df('AB', [2,3])
y.index = x.index#复制索引
1
x
A B
0 A0 B0
1 A1 B1
1
y
A B
0 A2 B2
1 A3 B3
1
pd.concat([x,y])
A B
0 A0 B0
1 A1 B1
0 A2 B2
1 A3 B3
1
#这里就有重复的索引
1
2
#忽略索引
pd.concat([x,y], keys=['x','y'],ignore_index=True)
A B
0 A0 B0
1 A1 B1
2 A2 B2
3 A3 B3

2.类似join的合并

1
2
3
df5 = make_df('ABC',[1.2])
df6 = make_df('BCD',[3,4])
print(df5);print(df6);print(pd.concat([df5,df6]));
        A     B     C
1.2  A1.2  B1.2  C1.2
    B   C   D
3  B3  C3  D3
4  B4  C4  D4
        A     B     C    D
1.2  A1.2  B1.2  C1.2  NaN
3.0   NaN    B3    C3   D3
4.0   NaN    B4    C4   D4


D:\Software\Anaconda3\lib\site-packages\ipykernel_launcher.py:3: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.

  This is separate from the ipykernel package so we can avoid doing imports until
1
2
3
4
df5 = make_df('ABC',[1.2])
df6 = make_df('BCD',[3,4])
print(df5);print(df6);print(pd.concat([df5,df6],join='outer'));
#并集合并
        A     B     C
1.2  A1.2  B1.2  C1.2
    B   C   D
3  B3  C3  D3
4  B4  C4  D4
        A     B     C    D
1.2  A1.2  B1.2  C1.2  NaN
3.0   NaN    B3    C3   D3
4.0   NaN    B4    C4   D4


D:\Software\Anaconda3\lib\site-packages\ipykernel_launcher.py:3: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.

  This is separate from the ipykernel package so we can avoid doing imports until
1
2
3
4
df5 = make_df('ABC',[1.2])
df6 = make_df('BCD',[3,4])
print(df5);print(df6);print(pd.concat([df5,df6],join='inner'));
#交集合并
        A     B     C
1.2  A1.2  B1.2  C1.2
    B   C   D
3  B3  C3  D3
4  B4  C4  D4
        B     C
1.2  B1.2  C1.2
3.0    B3    C3
4.0    B4    C4

3.append()

1
2
3
4
print(df1)
print('\n')
print(df2)
df1.append(df2)
    A   B
1  A1  B1
2  A2  B2


    A   B
3  A3  B3
4  A4  B4
A B
1 A1 B1
2 A2 B2
3 A3 B3
4 A4 B4
1
2